import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Load ACLED Data 2025
acled_data = pd.read_csv('ACLED Data_2025-09-19.csv')
# Display basic information about the dataset
print("ACLED Data 2025 - Dataset Overview")
print("=" * 50)
print(f"Shape: {acled_data.shape}")
print(f"Columns: {list(acled_data.columns)}")
print(f"Date range: {acled_data['year'].min()} - {acled_data['year'].max()}")
print(f"Countries: {acled_data['country'].nunique()}")
print(f"Total events: {len(acled_data):,}")
print(f"Total fatalities: {acled_data['fatalities'].sum():,}")
print("\nFirst few rows:")
acled_data.head()
ACLED Data 2025 - Dataset Overview ================================================== Shape: (19739, 31) Columns: ['event_id_cnty', 'event_date', 'year', 'time_precision', 'disorder_type', 'event_type', 'sub_event_type', 'actor1', 'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2', 'interaction', 'civilian_targeting', 'iso', 'region', 'country', 'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude', 'geo_precision', 'source', 'source_scale', 'notes', 'fatalities', 'tags', 'timestamp'] Date range: 2010 - 2025 Countries: 9 Total events: 19,739 Total fatalities: 19,555 First few rows:
| event_id_cnty | event_date | year | time_precision | disorder_type | event_type | sub_event_type | actor1 | assoc_actor_1 | inter1 | ... | location | latitude | longitude | geo_precision | source | source_scale | notes | fatalities | tags | timestamp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | MMR1 | 2010-01-01 | 2010 | 1 | Political violence | Violence against civilians | Attack | Military Forces of Myanmar (1988-2011) | DKBA (Buddhist): Democratic Karen Buddhist Arm... | State forces | ... | Kyainseikgyi | 16.0408 | 98.1232 | 2 | Democratic Voice of Burma | National | On 1 January 2010, the Democratic Karen Buddhi... | 0 | NaN | 1552577624 |
| 1 | MMR2 | 2010-01-02 | 2010 | 1 | Political violence | Violence against civilians | Attack | Military Forces of Myanmar (1988-2011) | DKBA (Buddhist): Democratic Karen Buddhist Arm... | State forces | ... | Kyainseikgyi | 16.0408 | 98.1232 | 2 | Democratic Voice of Burma | National | On 2 January 2010, 40 Democratic Karen Buddhis... | 0 | NaN | 1552577624 |
| 2 | MMR4 | 2010-01-07 | 2010 | 1 | Political violence | Violence against civilians | Attack | Military Forces of Myanmar (1988-2011) | NaN | State forces | ... | Thanbyuzayat | 15.9689 | 97.7274 | 2 | Irrawaddy | National | On 7 January 2010, in Thanbyuzayat township, M... | 0 | NaN | 1561473737 |
| 3 | KHM1883 | 2010-01-10 | 2010 | 1 | Political violence | Violence against civilians | Attack | Police Forces of Cambodia (1993-) | NaN | State forces | ... | Ta Khmau | 11.4464 | 104.9455 | 3 | US State Department | Other | On 10 January 2010, two commune officers beat ... | 1 | NaN | 1618561247 |
| 4 | THA7099 | 2010-01-10 | 2010 | 1 | Political violence | Violence against civilians | Attack | Police Forces of Thailand (2008-2011) Border P... | NaN | State forces | ... | Si Sa Ket | 15.1148 | 104.3293 | 3 | US State Department | Other | On 10 January 2010, border rangers shot at a g... | 2 | NaN | 1623191713 |
5 rows × 31 columns
# Prepare data for time series analysis
# Convert date column to datetime
acled_data['event_date'] = pd.to_datetime(acled_data['event_date'])
# Filter for Myanmar and Kachin State
myanmar_data = acled_data[acled_data['country'] == 'Myanmar'].copy()
kachin_data = myanmar_data[myanmar_data['admin1'] == 'Kachin'].copy()
print(f"Kachin State events: {len(kachin_data)}")
print(f"Date range: {kachin_data['event_date'].min()} to {kachin_data['event_date'].max()}")
# Display basic Kachin data info
print(f"\nKachin State Overview:")
print(f"Total fatalities: {kachin_data['fatalities'].sum()}")
print(f"Districts: {sorted(kachin_data['admin2'].unique())}")
print(f"Event types: {kachin_data['event_type'].value_counts().head()}")
kachin_data.head()
Kachin State events: 829 Date range: 2010-05-23 00:00:00 to 2025-09-11 00:00:00 Kachin State Overview: Total fatalities: 526 Districts: ['Bhamo', 'Mohnyin', 'Myitkyina', 'Puta-O'] Event types: Violence against civilians 419 Explosions/Remote violence 410 Name: event_type, dtype: int64
| event_id_cnty | event_date | year | time_precision | disorder_type | event_type | sub_event_type | actor1 | assoc_actor_1 | inter1 | ... | location | latitude | longitude | geo_precision | source | source_scale | notes | fatalities | tags | timestamp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 36 | MMR59 | 2010-05-23 | 2010 | 1 | Political violence | Violence against civilians | Attack | Military Forces of Myanmar (1988-2011) | NaN | State forces | ... | Myitkyina | 25.3832 | 97.3963 | 3 | Kachin News Group | Subnational | On 23 May 2010, the Myanmar military arrested ... | 0 | NaN | 1702344110 |
| 121 | MMR275 | 2011-06-09 | 2011 | 1 | Political violence | Violence against civilians | Attack | Military Forces of Myanmar (2011-2016) | NaN | State forces | ... | Momauk | 24.2508 | 97.3481 | 2 | Kachin News Group | Subnational | On 9 June 2011, in Sang Gang village, Momauk t... | 1 | NaN | 1702344110 |
| 122 | MMR8328 | 2011-06-13 | 2011 | 1 | Political violence | Violence against civilians | Sexual violence | Military Forces of Myanmar (2011-2016) | NaN | State forces | ... | Myitkyina | 25.3832 | 97.3963 | 3 | Women's League of Burma | Other | On 13 June 2011, in Ahlaw Bum area (coded for ... | 0 | NaN | 1563887830 |
| 124 | MMR8329 | 2011-06-14 | 2011 | 1 | Political violence | Violence against civilians | Sexual violence | Military Forces of Myanmar (2011-2016) | NaN | State forces | ... | Momauk | 24.2508 | 97.3481 | 1 | Kachin Women's Association Thailand | Other | On 14 June 2011, in Momauk Township, in Bhamo ... | 0 | NaN | 1708472559 |
| 125 | MMR6875 | 2011-06-15 | 2011 | 3 | Political violence | Violence against civilians | Attack | Military Forces of Myanmar (2011-2016) | NaN | State forces | ... | Puta-O | 27.3526 | 97.4004 | 1 | UN (unspecified) | Other | In June 2011, in Puta-O Township, Kachin State... | 1 | NaN | 1567539407 |
5 rows × 31 columns
# Create quarterly time series data for Kachin State
print("Creating Quarterly Time Series Data for Kachin State")
print("=" * 60)
# Create quarterly periods
kachin_data['quarter'] = kachin_data['event_date'].dt.to_period('Q')
# Aggregate events by quarter
quarterly_events = kachin_data.groupby('quarter').agg({
'event_id_cnty': 'count', # Count of events
'fatalities': ['sum', 'mean'], # Total and average fatalities
'event_date': ['min', 'max'] # Date range for each quarter
}).round(2)
# Flatten column names
quarterly_events.columns = ['events_count', 'total_fatalities', 'avg_fatalities', 'quarter_start', 'quarter_end']
quarterly_events = quarterly_events.reset_index()
# Create a clean two-column dataset: Time and Events
kachin_time_series = quarterly_events[['quarter', 'events_count']].copy()
kachin_time_series.columns = ['Time', 'Events']
# Convert quarter to string for better readability
kachin_time_series['Time'] = kachin_time_series['Time'].astype(str)
print("Quarterly Time Series Data for Kachin State:")
print("-" * 50)
print(kachin_time_series)
print(f"\nDataset Shape: {kachin_time_series.shape}")
print(f"Time Period: {kachin_time_series['Time'].min()} to {kachin_time_series['Time'].max()}")
print(f"Total Events: {kachin_time_series['Events'].sum()}")
print(f"Average Events per Quarter: {kachin_time_series['Events'].mean():.2f}")
# Save to CSV for external use
kachin_time_series.to_csv('kachin_quarterly_events.csv', index=False)
print(f"\nDataset saved as 'kachin_quarterly_events.csv'")
Creating Quarterly Time Series Data for Kachin State
============================================================
Quarterly Time Series Data for Kachin State:
--------------------------------------------------
Time Events
0 2010Q2 1
1 2011Q2 16
2 2011Q3 6
3 2011Q4 34
4 2012Q1 5
5 2012Q2 11
6 2012Q3 5
7 2012Q4 10
8 2013Q1 7
9 2013Q3 5
10 2013Q4 9
11 2014Q1 4
12 2014Q2 2
13 2014Q3 1
14 2014Q4 2
15 2015Q1 2
16 2015Q2 8
17 2015Q3 2
18 2015Q4 9
19 2016Q2 8
20 2016Q3 9
21 2016Q4 27
22 2017Q1 11
23 2017Q2 3
24 2017Q3 9
25 2017Q4 8
26 2018Q1 12
27 2018Q2 15
28 2018Q3 10
29 2018Q4 3
30 2019Q1 2
31 2019Q2 2
32 2020Q3 1
33 2021Q1 7
34 2021Q2 65
35 2021Q3 20
36 2021Q4 13
37 2022Q1 15
38 2022Q2 9
39 2022Q3 23
40 2022Q4 27
41 2023Q1 32
42 2023Q2 32
43 2023Q3 50
44 2023Q4 19
45 2024Q1 33
46 2024Q2 53
47 2024Q3 22
48 2024Q4 17
49 2025Q1 56
50 2025Q2 43
51 2025Q3 34
Dataset Shape: (52, 2)
Time Period: 2010Q2 to 2025Q3
Total Events: 829
Average Events per Quarter: 15.94
Dataset saved as 'kachin_quarterly_events.csv'
# Additional analysis: Monthly data aggregated to quarters with more details
print("Detailed Quarterly Analysis with Additional Metrics")
print("=" * 60)
# Create more detailed quarterly aggregation
detailed_quarterly = kachin_data.groupby('quarter').agg({
'event_id_cnty': 'count', # Event count
'fatalities': ['sum', 'mean', 'max'], # Fatalities metrics
'event_type': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Unknown', # Most common event type
'admin2': lambda x: x.nunique(), # Number of districts affected
'actor1': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Unknown' # Most common actor
}).round(2)
# Flatten column names
detailed_quarterly.columns = ['events_count', 'total_fatalities', 'avg_fatalities', 'max_fatalities',
'most_common_event_type', 'districts_affected', 'most_common_actor']
detailed_quarterly = detailed_quarterly.reset_index()
print("Detailed Quarterly Data:")
print(detailed_quarterly)
# Create extended time series with multiple metrics
extended_time_series = detailed_quarterly[['quarter', 'events_count', 'total_fatalities', 'avg_fatalities']].copy()
extended_time_series.columns = ['Time', 'Events', 'Fatalities', 'Avg_Fatalities_Per_Event']
extended_time_series['Time'] = extended_time_series['Time'].astype(str)
print(f"\nExtended Time Series Data (4 columns):")
print(extended_time_series)
# Save extended dataset
extended_time_series.to_csv('kachin_quarterly_extended.csv', index=False)
print(f"\nExtended dataset saved as 'kachin_quarterly_extended.csv'")
# Summary statistics
print(f"\nSUMMARY STATISTICS:")
print(f"Quarters with data: {len(detailed_quarterly)}")
print(f"Total events: {detailed_quarterly['events_count'].sum()}")
print(f"Total fatalities: {detailed_quarterly['total_fatalities'].sum()}")
print(f"Average events per quarter: {detailed_quarterly['events_count'].mean():.2f}")
print(f"Average fatalities per quarter: {detailed_quarterly['total_fatalities'].mean():.2f}")
print(f"Most violent quarter: {detailed_quarterly.loc[detailed_quarterly['total_fatalities'].idxmax(), 'quarter']} ({detailed_quarterly['total_fatalities'].max()} fatalities)")
print(f"Most active quarter: {detailed_quarterly.loc[detailed_quarterly['events_count'].idxmax(), 'quarter']} ({detailed_quarterly['events_count'].max()} events)")
Detailed Quarterly Analysis with Additional Metrics
============================================================
Detailed Quarterly Data:
quarter events_count total_fatalities avg_fatalities max_fatalities \
0 2010Q2 1 0 0.00 0
1 2011Q2 16 23 1.44 7
2 2011Q3 6 5 0.83 2
3 2011Q4 34 10 0.29 1
4 2012Q1 5 6 1.20 3
5 2012Q2 11 4 0.36 2
6 2012Q3 5 2 0.40 1
7 2012Q4 10 0 0.00 0
8 2013Q1 7 5 0.71 4
9 2013Q3 5 2 0.40 2
10 2013Q4 9 2 0.22 1
11 2014Q1 4 4 1.00 3
12 2014Q2 2 1 0.50 1
13 2014Q3 1 0 0.00 0
14 2014Q4 2 2 1.00 1
15 2015Q1 2 3 1.50 3
16 2015Q2 8 4 0.50 1
17 2015Q3 2 2 1.00 1
18 2015Q4 9 1 0.11 1
19 2016Q2 8 3 0.38 1
20 2016Q3 9 0 0.00 0
21 2016Q4 27 3 0.11 1
22 2017Q1 11 2 0.18 1
23 2017Q2 3 3 1.00 3
24 2017Q3 9 5 0.56 3
25 2017Q4 8 6 0.75 2
26 2018Q1 12 17 1.42 10
27 2018Q2 15 5 0.33 2
28 2018Q3 10 0 0.00 0
29 2018Q4 3 0 0.00 0
30 2019Q1 2 1 0.50 1
31 2019Q2 2 0 0.00 0
32 2020Q3 1 0 0.00 0
33 2021Q1 7 0 0.00 0
34 2021Q2 65 10 0.15 3
35 2021Q3 20 11 0.55 1
36 2021Q4 13 3 0.23 1
37 2022Q1 15 11 0.73 2
38 2022Q2 9 3 0.33 2
39 2022Q3 23 13 0.57 6
40 2022Q4 27 82 3.04 78
41 2023Q1 32 14 0.44 3
42 2023Q2 32 15 0.47 3
43 2023Q3 50 14 0.28 3
44 2023Q4 19 17 0.89 5
45 2024Q1 33 11 0.33 3
46 2024Q2 53 21 0.40 5
47 2024Q3 22 6 0.27 3
48 2024Q4 17 17 1.00 9
49 2025Q1 56 86 1.54 36
50 2025Q2 43 36 0.84 15
51 2025Q3 34 35 1.03 17
most_common_event_type districts_affected \
0 Violence against civilians 1
1 Violence against civilians 4
2 Violence against civilians 3
3 Violence against civilians 3
4 Violence against civilians 3
5 Violence against civilians 2
6 Violence against civilians 2
7 Explosions/Remote violence 3
8 Explosions/Remote violence 3
9 Violence against civilians 2
10 Violence against civilians 3
11 Violence against civilians 2
12 Violence against civilians 1
13 Violence against civilians 1
14 Violence against civilians 2
15 Explosions/Remote violence 1
16 Violence against civilians 4
17 Violence against civilians 2
18 Violence against civilians 4
19 Violence against civilians 3
20 Violence against civilians 2
21 Explosions/Remote violence 3
22 Explosions/Remote violence 3
23 Violence against civilians 2
24 Violence against civilians 3
25 Violence against civilians 3
26 Violence against civilians 4
27 Explosions/Remote violence 4
28 Violence against civilians 3
29 Violence against civilians 3
30 Violence against civilians 1
31 Violence against civilians 1
32 Violence against civilians 1
33 Violence against civilians 3
34 Explosions/Remote violence 4
35 Violence against civilians 4
36 Violence against civilians 3
37 Violence against civilians 4
38 Violence against civilians 3
39 Violence against civilians 4
40 Violence against civilians 3
41 Violence against civilians 4
42 Explosions/Remote violence 3
43 Violence against civilians 3
44 Explosions/Remote violence 3
45 Explosions/Remote violence 4
46 Explosions/Remote violence 4
47 Explosions/Remote violence 3
48 Explosions/Remote violence 4
49 Explosions/Remote violence 3
50 Explosions/Remote violence 3
51 Explosions/Remote violence 4
most_common_actor
0 Military Forces of Myanmar (1988-2011)
1 Military Forces of Myanmar (2011-2016)
2 Military Forces of Myanmar (2011-2016)
3 Military Forces of Myanmar (2011-2016)
4 Military Forces of Myanmar (2011-2016)
5 Military Forces of Myanmar (2011-2016)
6 Military Forces of Myanmar (2011-2016)
7 Military Forces of Myanmar (2011-2016)
8 Military Forces of Myanmar (2011-2016)
9 Military Forces of Myanmar (2011-2016)
10 Military Forces of Myanmar (2011-2016)
11 Military Forces of Myanmar (2011-2016)
12 Military Forces of Myanmar (2011-2016)
13 Military Forces of Myanmar (2011-2016)
14 Military Forces of Myanmar (2011-2016)
15 Military Forces of Myanmar (2011-2016)
16 Military Forces of Myanmar (2011-2016)
17 Military Forces of Myanmar (2011-2016)
18 Military Forces of Myanmar (2011-2016)
19 Military Forces of Myanmar (2016-2021)
20 Military Forces of Myanmar (2016-2021)
21 Military Forces of Myanmar (2016-2021)
22 Military Forces of Myanmar (2016-2021)
23 Military Forces of Myanmar (2016-2021)
24 Military Forces of Myanmar (2016-2021)
25 Military Forces of Myanmar (2016-2021)
26 Military Forces of Myanmar (2016-2021)
27 Military Forces of Myanmar (2016-2021)
28 Military Forces of Myanmar (2016-2021)
29 Military Forces of Myanmar (2016-2021)
30 Military Forces of Myanmar (2016-2021)
31 Police Forces of Myanmar (2016-2021)
32 Military Forces of Myanmar (2016-2021) People'...
33 Military Forces of Myanmar (2021-)
34 Military Forces of Myanmar (2021-)
35 Military Forces of Myanmar (2021-)
36 Military Forces of Myanmar (2021-)
37 Military Forces of Myanmar (2021-)
38 Military Forces of Myanmar (2021-)
39 Military Forces of Myanmar (2021-)
40 Military Forces of Myanmar (2021-)
41 Military Forces of Myanmar (2021-)
42 Military Forces of Myanmar (2021-)
43 Military Forces of Myanmar (2021-)
44 Military Forces of Myanmar (2021-)
45 Military Forces of Myanmar (2021-)
46 Military Forces of Myanmar (2021-)
47 Military Forces of Myanmar (2021-)
48 Military Forces of Myanmar (2021-)
49 Military Forces of Myanmar (2021-)
50 Military Forces of Myanmar (2021-)
51 Military Forces of Myanmar (2021-)
Extended Time Series Data (4 columns):
Time Events Fatalities Avg_Fatalities_Per_Event
0 2010Q2 1 0 0.00
1 2011Q2 16 23 1.44
2 2011Q3 6 5 0.83
3 2011Q4 34 10 0.29
4 2012Q1 5 6 1.20
5 2012Q2 11 4 0.36
6 2012Q3 5 2 0.40
7 2012Q4 10 0 0.00
8 2013Q1 7 5 0.71
9 2013Q3 5 2 0.40
10 2013Q4 9 2 0.22
11 2014Q1 4 4 1.00
12 2014Q2 2 1 0.50
13 2014Q3 1 0 0.00
14 2014Q4 2 2 1.00
15 2015Q1 2 3 1.50
16 2015Q2 8 4 0.50
17 2015Q3 2 2 1.00
18 2015Q4 9 1 0.11
19 2016Q2 8 3 0.38
20 2016Q3 9 0 0.00
21 2016Q4 27 3 0.11
22 2017Q1 11 2 0.18
23 2017Q2 3 3 1.00
24 2017Q3 9 5 0.56
25 2017Q4 8 6 0.75
26 2018Q1 12 17 1.42
27 2018Q2 15 5 0.33
28 2018Q3 10 0 0.00
29 2018Q4 3 0 0.00
30 2019Q1 2 1 0.50
31 2019Q2 2 0 0.00
32 2020Q3 1 0 0.00
33 2021Q1 7 0 0.00
34 2021Q2 65 10 0.15
35 2021Q3 20 11 0.55
36 2021Q4 13 3 0.23
37 2022Q1 15 11 0.73
38 2022Q2 9 3 0.33
39 2022Q3 23 13 0.57
40 2022Q4 27 82 3.04
41 2023Q1 32 14 0.44
42 2023Q2 32 15 0.47
43 2023Q3 50 14 0.28
44 2023Q4 19 17 0.89
45 2024Q1 33 11 0.33
46 2024Q2 53 21 0.40
47 2024Q3 22 6 0.27
48 2024Q4 17 17 1.00
49 2025Q1 56 86 1.54
50 2025Q2 43 36 0.84
51 2025Q3 34 35 1.03
Extended dataset saved as 'kachin_quarterly_extended.csv'
SUMMARY STATISTICS:
Quarters with data: 52
Total events: 829
Total fatalities: 526
Average events per quarter: 15.94
Average fatalities per quarter: 10.12
Most violent quarter: 2025Q1 (86 fatalities)
Most active quarter: 2021Q2 (65 events)
# Visualize the quarterly time series data
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Kachin State Quarterly Time Series Analysis', fontsize=16, fontweight='bold')
# 1. Basic quarterly events line chart
axes[0, 0].plot(range(len(kachin_time_series)), kachin_time_series['Events'],
linewidth=3, marker='o', markersize=6, color='red', alpha=0.8)
axes[0, 0].set_title('Quarterly Events in Kachin State', fontweight='bold')
axes[0, 0].set_xlabel('Quarter Index')
axes[0, 0].set_ylabel('Number of Events')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].tick_params(axis='x', rotation=45)
# Add quarter labels
axes[0, 0].set_xticks(range(len(kachin_time_series)))
axes[0, 0].set_xticklabels(kachin_time_series['Time'], rotation=45, ha='right')
# 2. Quarterly fatalities
axes[0, 1].plot(range(len(extended_time_series)), extended_time_series['Fatalities'],
linewidth=3, marker='s', markersize=6, color='darkred', alpha=0.8)
axes[0, 1].set_title('Quarterly Fatalities in Kachin State', fontweight='bold')
axes[0, 1].set_xlabel('Quarter Index')
axes[0, 1].set_ylabel('Number of Fatalities')
axes[0, 1].grid(True, alpha=0.3)
axes[0, 1].set_xticks(range(len(extended_time_series)))
axes[0, 1].set_xticklabels(extended_time_series['Time'], rotation=45, ha='right')
# 3. Events vs Fatalities scatter plot
axes[1, 0].scatter(extended_time_series['Events'], extended_time_series['Fatalities'],
s=100, color='blue', alpha=0.7, edgecolors='black')
axes[1, 0].set_title('Events vs Fatalities Correlation', fontweight='bold')
axes[1, 0].set_xlabel('Number of Events')
axes[1, 0].set_ylabel('Number of Fatalities')
axes[1, 0].grid(True, alpha=0.3)
# Add correlation coefficient
correlation = extended_time_series['Events'].corr(extended_time_series['Fatalities'])
axes[1, 0].text(0.05, 0.95, f'Correlation: {correlation:.3f}',
transform=axes[1, 0].transAxes, fontsize=12,
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
# 4. Bar chart of quarterly events
bars = axes[1, 1].bar(range(len(kachin_time_series)), kachin_time_series['Events'],
color='orange', alpha=0.7, edgecolor='black', linewidth=1)
axes[1, 1].set_title('Quarterly Events Bar Chart', fontweight='bold')
axes[1, 1].set_xlabel('Quarter')
axes[1, 1].set_ylabel('Number of Events')
axes[1, 1].grid(True, alpha=0.3, axis='y')
axes[1, 1].set_xticks(range(len(kachin_time_series)))
axes[1, 1].set_xticklabels(kachin_time_series['Time'], rotation=45, ha='right')
# Add value labels on bars
for i, bar in enumerate(bars):
height = bar.get_height()
axes[1, 1].text(bar.get_x() + bar.get_width()/2., height + 0.1,
f'{int(height)}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
plt.show()
# Display the final clean dataset
print("\nFINAL TIME SERIES DATASET FOR ANALYSIS:")
print("=" * 50)
print("Two-column dataset (Time, Events):")
print(kachin_time_series)
print(f"\nDataset ready for time series analysis!")
print(f"Use 'kachin_time_series' variable or load from 'kachin_quarterly_events.csv'")
FINAL TIME SERIES DATASET FOR ANALYSIS:
==================================================
Two-column dataset (Time, Events):
Time Events
0 2010Q2 1
1 2011Q2 16
2 2011Q3 6
3 2011Q4 34
4 2012Q1 5
5 2012Q2 11
6 2012Q3 5
7 2012Q4 10
8 2013Q1 7
9 2013Q3 5
10 2013Q4 9
11 2014Q1 4
12 2014Q2 2
13 2014Q3 1
14 2014Q4 2
15 2015Q1 2
16 2015Q2 8
17 2015Q3 2
18 2015Q4 9
19 2016Q2 8
20 2016Q3 9
21 2016Q4 27
22 2017Q1 11
23 2017Q2 3
24 2017Q3 9
25 2017Q4 8
26 2018Q1 12
27 2018Q2 15
28 2018Q3 10
29 2018Q4 3
30 2019Q1 2
31 2019Q2 2
32 2020Q3 1
33 2021Q1 7
34 2021Q2 65
35 2021Q3 20
36 2021Q4 13
37 2022Q1 15
38 2022Q2 9
39 2022Q3 23
40 2022Q4 27
41 2023Q1 32
42 2023Q2 32
43 2023Q3 50
44 2023Q4 19
45 2024Q1 33
46 2024Q2 53
47 2024Q3 22
48 2024Q4 17
49 2025Q1 56
50 2025Q2 43
51 2025Q3 34
Dataset ready for time series analysis!
Use 'kachin_time_series' variable or load from 'kachin_quarterly_events.csv'
# Data quality check and basic statistics
print("Data Quality Check")
print("=" * 30)
print(f"Missing values per column:")
missing_data = acled_data.isnull().sum()
for col, missing in missing_data.items():
if missing > 0:
print(f" {col}: {missing:,} ({missing/len(acled_data)*100:.1f}%)")
print(f"\nData types:")
print(acled_data.dtypes)
print(f"\nBasic statistics for numeric columns:")
acled_data.describe()
Data Quality Check ============================== Missing values per column: assoc_actor_1: 16,938 (85.8%) actor2: 3,098 (15.7%) assoc_actor_2: 11,269 (57.1%) inter2: 3,098 (15.7%) civilian_targeting: 5,744 (29.1%) admin2: 4 (0.0%) admin3: 217 (1.1%) tags: 19,431 (98.4%) Data types: event_id_cnty object event_date datetime64[ns] year int64 time_precision int64 disorder_type object event_type object sub_event_type object actor1 object assoc_actor_1 object inter1 object actor2 object assoc_actor_2 object inter2 object interaction object civilian_targeting object iso int64 region object country object admin1 object admin2 object admin3 object location object latitude float64 longitude float64 geo_precision int64 source object source_scale object notes object fatalities int64 tags object timestamp int64 dtype: object Basic statistics for numeric columns:
| year | time_precision | iso | latitude | longitude | geo_precision | fatalities | timestamp | |
|---|---|---|---|---|---|---|---|---|
| count | 19739.000000 | 19739.000000 | 19739.000000 | 19739.000000 | 19739.000000 | 19739.000000 | 19739.000000 | 1.973900e+04 |
| mean | 2021.663002 | 1.089468 | 229.395613 | 18.103681 | 102.809433 | 1.315872 | 0.990678 | 1.742098e+09 |
| std | 3.200041 | 0.315442 | 214.182038 | 6.059362 | 12.218299 | 0.485452 | 3.388604 | 2.976182e+07 |
| min | 2010.000000 | 1.000000 | 104.000000 | -10.732300 | 92.183700 | 1.000000 | 0.000000 | 1.552577e+09 |
| 25% | 2020.000000 | 1.000000 | 104.000000 | 14.829300 | 95.290650 | 1.000000 | 0.000000 | 1.744085e+09 |
| 50% | 2023.000000 | 1.000000 | 104.000000 | 20.223600 | 96.481300 | 1.000000 | 0.000000 | 1.753971e+09 |
| 75% | 2024.000000 | 1.000000 | 360.000000 | 22.210400 | 106.630000 | 2.000000 | 1.000000 | 1.754411e+09 |
| max | 2025.000000 | 3.000000 | 764.000000 | 27.505300 | 140.850400 | 3.000000 | 243.000000 | 1.758048e+09 |
# Convert date column to datetime for better analysis
acled_data['event_date'] = pd.to_datetime(acled_data['event_date'])
# Extract additional date components
acled_data['month'] = acled_data['event_date'].dt.month
acled_data['day_of_year'] = acled_data['event_date'].dt.dayofyear
# Display data info after date conversion
print("After date conversion:")
print(f"Date range: {acled_data['event_date'].min()} to {acled_data['event_date'].max()}")
print(f"Unique years: {sorted(acled_data['year'].unique())}")
After date conversion: Date range: 2010-01-01 00:00:00 to 2025-09-12 00:00:00 Unique years: [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]
# Filter for Myanmar data specifically (since this is about Kachin conflict)
myanmar_data = acled_data[acled_data['country'] == 'Myanmar'].copy()
print("Myanmar-specific ACLED Data")
print("=" * 40)
print(f"Myanmar events: {len(myanmar_data):,}")
print(f"Date range: {myanmar_data['event_date'].min()} to {myanmar_data['event_date'].max()}")
print(f"Total fatalities in Myanmar: {myanmar_data['fatalities'].sum():,}")
print(f"States/Regions: {myanmar_data['admin1'].nunique()}")
print(f"Unique states: {sorted(myanmar_data['admin1'].unique())}")
print(f"\nEvent types in Myanmar:")
print(myanmar_data['event_type'].value_counts())
Myanmar-specific ACLED Data ======================================== Myanmar events: 14,460 Date range: 2010-01-01 00:00:00 to 2025-09-12 00:00:00 Total fatalities in Myanmar: 13,378 States/Regions: 18 Unique states: ['Ayeyarwady', 'Bago-East', 'Bago-West', 'Chin', 'Kachin', 'Kayah', 'Kayin', 'Magway', 'Mandalay', 'Mon', 'Nay Pyi Taw', 'Rakhine', 'Sagaing', 'Shan-East', 'Shan-North', 'Shan-South', 'Tanintharyi', 'Yangon'] Event types in Myanmar: Explosions/Remote violence 7487 Violence against civilians 6973 Name: event_type, dtype: int64
# Focus on Kachin State specifically
kachin_data = myanmar_data[myanmar_data['admin1'] == 'Kachin'].copy()
# Create line graphs for temporal analysis
# 1. Events over time for Myanmar
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('ACLED Data Analysis - Myanmar Conflict Trends', fontsize=16, fontweight='bold')
# Monthly events in Myanmar over time
monthly_events = myanmar_data.groupby(['year', 'month']).size().reset_index(name='event_count')
monthly_events['date'] = pd.to_datetime(monthly_events[['year', 'month']].assign(day=1))
axes[0, 0].plot(monthly_events['date'], monthly_events['event_count'], linewidth=2, color='red', alpha=0.7)
axes[0, 0].set_title('Monthly Events in Myanmar Over Time', fontweight='bold')
axes[0, 0].set_xlabel('Year')
axes[0, 0].set_ylabel('Number of Events')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].tick_params(axis='x', rotation=45)
# Monthly fatalities in Myanmar
monthly_fatalities = myanmar_data.groupby(['year', 'month'])['fatalities'].sum().reset_index()
monthly_fatalities['date'] = pd.to_datetime(monthly_fatalities[['year', 'month']].assign(day=1))
axes[0, 1].plot(monthly_fatalities['date'], monthly_fatalities['fatalities'], linewidth=2, color='darkred', alpha=0.7)
axes[0, 1].set_title('Monthly Fatalities in Myanmar Over Time', fontweight='bold')
axes[0, 1].set_xlabel('Year')
axes[0, 1].set_ylabel('Number of Fatalities')
axes[0, 1].grid(True, alpha=0.3)
axes[0, 1].tick_params(axis='x', rotation=45)
# Annual events by state (top 5 states)
top_states = myanmar_data['admin1'].value_counts().head(5).index
annual_by_state = myanmar_data[myanmar_data['admin1'].isin(top_states)].groupby(['year', 'admin1']).size().reset_index(name='event_count')
for state in top_states:
state_data = annual_by_state[annual_by_state['admin1'] == state]
axes[1, 0].plot(state_data['year'], state_data['event_count'], marker='o', linewidth=2, label=state, alpha=0.8)
axes[1, 0].set_title('Annual Events by State (Top 5)', fontweight='bold')
axes[1, 0].set_xlabel('Year')
axes[1, 0].set_ylabel('Number of Events')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)
# Event types over time
event_types_over_time = myanmar_data.groupby(['year', 'event_type']).size().reset_index(name='event_count')
major_event_types = myanmar_data['event_type'].value_counts().head(4).index
for event_type in major_event_types:
type_data = event_types_over_time[event_types_over_time['event_type'] == event_type]
axes[1, 1].plot(type_data['year'], type_data['event_count'], marker='s', linewidth=2, label=event_type, alpha=0.8)
axes[1, 1].set_title('Major Event Types Over Time', fontweight='bold')
axes[1, 1].set_xlabel('Year')
axes[1, 1].set_ylabel('Number of Events')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Kachin-specific analysis with line graphs
if len(kachin_data) > 0:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Kachin State Conflict Analysis', fontsize=16, fontweight='bold')
# Monthly events in Kachin
kachin_monthly = kachin_data.groupby(['year', 'month']).size().reset_index(name='event_count')
kachin_monthly['date'] = pd.to_datetime(kachin_monthly[['year', 'month']].assign(day=1))
axes[0, 0].plot(kachin_monthly['date'], kachin_monthly['event_count'],
linewidth=3, color='orange', marker='o', markersize=4)
axes[0, 0].set_title('Monthly Events in Kachin State', fontweight='bold')
axes[0, 0].set_xlabel('Year')
axes[0, 0].set_ylabel('Number of Events')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].tick_params(axis='x', rotation=45)
# Monthly fatalities in Kachin
kachin_fatalities = kachin_data.groupby(['year', 'month'])['fatalities'].sum().reset_index()
kachin_fatalities['date'] = pd.to_datetime(kachin_fatalities[['year', 'month']].assign(day=1))
axes[0, 1].plot(kachin_fatalities['date'], kachin_fatalities['fatalities'],
linewidth=3, color='darkorange', marker='s', markersize=4)
axes[0, 1].set_title('Monthly Fatalities in Kachin State', fontweight='bold')
axes[0, 1].set_xlabel('Year')
axes[0, 1].set_ylabel('Number of Fatalities')
axes[0, 1].grid(True, alpha=0.3)
axes[0, 1].tick_params(axis='x', rotation=45)
# Annual events by district in Kachin
kachin_districts = kachin_data['admin2'].value_counts().head(4).index
annual_by_district = kachin_data[kachin_data['admin2'].isin(kachin_districts)].groupby(['year', 'admin2']).size().reset_index(name='event_count')
colors = ['blue', 'green', 'purple', 'brown']
for i, district in enumerate(kachin_districts):
district_data = annual_by_district[annual_by_district['admin2'] == district]
axes[1, 0].plot(district_data['year'], district_data['event_count'],
marker='o', linewidth=2, label=district, color=colors[i], alpha=0.8)
axes[1, 0].set_title('Annual Events by District in Kachin', fontweight='bold')
axes[1, 0].set_xlabel('Year')
axes[1, 0].set_ylabel('Number of Events')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)
# Event types in Kachin over time
kachin_event_types = kachin_data.groupby(['year', 'event_type']).size().reset_index(name='event_count')
kachin_major_types = kachin_data['event_type'].value_counts().head(4).index
colors = ['red', 'blue', 'green', 'purple']
for i, event_type in enumerate(kachin_major_types):
type_data = kachin_event_types[kachin_event_types['event_type'] == event_type]
axes[1, 1].plot(type_data['year'], type_data['event_count'],
marker='s', linewidth=2, label=event_type, color=colors[i], alpha=0.8)
axes[1, 1].set_title('Event Types in Kachin Over Time', fontweight='bold')
axes[1, 1].set_xlabel('Year')
axes[1, 1].set_ylabel('Number of Events')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
else:
print("No Kachin data available for visualization")
# Comparative line graph: Myanmar vs Kachin
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle('Comparative Analysis: Myanmar vs Kachin State', fontsize=16, fontweight='bold')
# Annual events comparison
myanmar_annual = myanmar_data.groupby('year').size().reset_index(name='myanmar_events')
kachin_annual = kachin_data.groupby('year').size().reset_index(name='kachin_events')
axes[0].plot(myanmar_annual['year'], myanmar_annual['myanmar_events'],
linewidth=3, color='red', marker='o', label='Myanmar Total', alpha=0.8)
if len(kachin_data) > 0:
axes[0].plot(kachin_annual['year'], kachin_annual['kachin_events'],
linewidth=3, color='orange', marker='s', label='Kachin State', alpha=0.8)
axes[0].set_title('Annual Events: Myanmar vs Kachin', fontweight='bold')
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Number of Events')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Annual fatalities comparison
myanmar_fatalities_annual = myanmar_data.groupby('year')['fatalities'].sum().reset_index(name='myanmar_fatalities')
kachin_fatalities_annual = kachin_data.groupby('year')['fatalities'].sum().reset_index(name='kachin_fatalities')
axes[1].plot(myanmar_fatalities_annual['year'], myanmar_fatalities_annual['myanmar_fatalities'],
linewidth=3, color='darkred', marker='o', label='Myanmar Total', alpha=0.8)
if len(kachin_data) > 0:
axes[1].plot(kachin_fatalities_annual['year'], kachin_fatalities_annual['kachin_fatalities'],
linewidth=3, color='darkorange', marker='s', label='Kachin State', alpha=0.8)
axes[1].set_title('Annual Fatalities: Myanmar vs Kachin', fontweight='bold')
axes[1].set_xlabel('Year')
axes[1].set_ylabel('Number of Fatalities')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Create monthly time series data for Kachin State
print("Creating Monthly Time Series Data for Kachin State")
print("=" * 60)
# Create monthly periods
kachin_data['month'] = kachin_data['event_date'].dt.to_period('M')
# Aggregate events by month
monthly_events = kachin_data.groupby('month').agg({
'event_id_cnty': 'count', # Count of events
'fatalities': ['sum', 'mean'], # Total and average fatalities
'event_date': ['min', 'max'] # Date range for each month
}).round(2)
# Flatten column names
monthly_events.columns = ['events_count', 'total_fatalities', 'avg_fatalities', 'month_start', 'month_end']
monthly_events = monthly_events.reset_index()
# Create a clean two-column dataset: Time and Events
kachin_time_series = monthly_events[['month', 'events_count']].copy()
kachin_time_series.columns = ['Time', 'Events']
print("Monthly Time Series Data for Kachin State:")
print("-" * 50)
print(kachin_time_series)
print(f"\nDataset Shape: {kachin_time_series.shape}")
print(f"Time Period: {kachin_time_series['Time'].min()} to {kachin_time_series['Time'].max()}")
print(f"Total Events: {kachin_time_series['Events'].sum()}")
print(f"Average Events per Month: {kachin_time_series['Events'].mean():.2f}")
# Save to CSV for external use
kachin_time_series.to_csv('kachin_monthly_events.csv', index=False)
print(f"\nDataset saved as 'kachin_monthly_events.csv'")
Creating Monthly Time Series Data for Kachin State
============================================================
Monthly Time Series Data for Kachin State:
--------------------------------------------------
Time Events
0 2010-05 1
1 2011-06 16
2 2011-07 1
3 2011-08 2
4 2011-09 3
.. ... ...
124 2025-05 10
125 2025-06 10
126 2025-07 13
127 2025-08 15
128 2025-09 6
[129 rows x 2 columns]
Dataset Shape: (129, 2)
Time Period: 2010-05 to 2025-09
Total Events: 829
Average Events per Month: 6.43
Dataset saved as 'kachin_monthly_events.csv'
# Libraries to do data manipulation
import numpy as np
import pandas as pd
# Library to do data visualization
import matplotlib.pyplot as plt
# Library to do time series decomposition
import statsmodels.api as sm
# Module to create ACF and PACF plots
from statsmodels.graphics import tsaplots
# Module to build AR, MA, ARMA, and ARIMA models
from statsmodels.tsa.arima.model import ARIMA
# Module to implement MSE and RSME during model evaluation
from sklearn.metrics import mean_squared_error
# Code for ignoring unnecessary warnings while executing some code
import warnings
warnings.filterwarnings("ignore")
# To perform seasonal decomposition, we need a numeric time series with a proper datetime index.
# Set the 'Time' column as the index and ensure it is a datetime or period index.
kachin_time_series_indexed = kachin_time_series.copy()
kachin_time_series_indexed.set_index('Time', inplace=True)
# Use only the numeric 'Events' column for decomposition
events_series = kachin_time_series_indexed['Events']
# Perform seasonal decomposition on the numeric series
decomposition = sm.tsa.seasonal_decompose(events_series, model='additive', period=4)
# Creating a dataframe to store the individual components
decomposed_data = pd.DataFrame({
'trend': decomposition.trend,
'seasonal': decomposition.seasonal,
'random_noise': decomposition.resid
})
fig, (ax1, ax2, ax3) = plt.subplots(nrows = 3, ncols = 1, figsize = (20, 16))
decomposed_data['trend'].plot(ax = ax1, label='Trend')
ax1.set_ylabel('Trend')
ax1.legend()
decomposed_data['seasonal'].plot(ax = ax2, label='Seasonal')
ax2.set_ylabel('Seasonal')
ax2.legend()
decomposed_data['random_noise'].plot(ax = ax3, label='Random Noise')
ax3.set_ylabel('Random Noise')
ax3.legend()
<matplotlib.legend.Legend at 0x305d7cca0>
# Split the data into approximately 80% train and 20% test based on time order
split_idx = int(len(kachin_time_series_indexed) * 0.8)
train_data = kachin_time_series_indexed.iloc[:split_idx]
test_data = kachin_time_series_indexed.iloc[split_idx:]
# Visualize the train and test data in the same plot
fig, ax = plt.subplots(figsize = (16, 6))
# Plotting train data
train_data['Events'].plot(ax = ax)
# Plotting test data
test_data['Events'].plot(ax = ax)
# Adding the legends in sequential order
plt.legend(['train data', 'test data'])
# Showing the time which divides the original data into train and test
plt.axvline(x = test_data.index[0], color = 'black', linestyle = '--')
# Showing the plot
plt.show()
# Importing ADF test from statsmodels package
from statsmodels.tsa.stattools import adfuller
# Implementing ADF test on the original time series data
result = adfuller(train_data['Events'])
# Printing the results
print(result[0])
print(result[1]) # To get the p-value
print(result[4])
-6.5121508137266115
1.0936377165190133e-08
{'1%': -3.4961490537199116, '5%': -2.8903209639580556, '10%': -2.5821223452518263}
# Creating two subplots to show ACF and PACF plots
fig, (ax1, ax2) = plt.subplots(nrows = 1, ncols = 2, figsize = (16, 6))
# Creating and plotting the ACF charts starting from lag = 1
tsaplots.plot_acf(train_data, zero = False, ax = ax1)
# Creating and plotting the ACF charts starting from lag = 1 till lag = 8
tsaplots.plot_pacf(train_data, zero = False, ax = ax2, lags = 8)
plt.show()
import pmdarima as pm
# Fix: Use only the numeric values from the Series, not the Period index
# Extract the numeric values from the Events column
train_values = train_data['Events'].values
auto_arima_model = pm.auto_arima(train_values, d = 0, seasonal = False, trace = True,
error_action = 'ignore', suppress_warnings = True)
print(auto_arima_model.summary())
Performing stepwise search to minimize aic
ARIMA(2,0,2)(0,0,0)[0] : AIC=627.958, Time=0.04 sec
ARIMA(0,0,0)(0,0,0)[0] : AIC=700.933, Time=0.00 sec
ARIMA(1,0,0)(0,0,0)[0] : AIC=637.938, Time=0.00 sec
ARIMA(0,0,1)(0,0,0)[0] : AIC=662.742, Time=0.00 sec
ARIMA(1,0,2)(0,0,0)[0] : AIC=627.126, Time=0.02 sec
ARIMA(0,0,2)(0,0,0)[0] : AIC=649.082, Time=0.01 sec
ARIMA(1,0,1)(0,0,0)[0] : AIC=634.169, Time=0.01 sec
ARIMA(1,0,3)(0,0,0)[0] : AIC=627.512, Time=0.03 sec
ARIMA(0,0,3)(0,0,0)[0] : AIC=650.490, Time=0.01 sec
ARIMA(2,0,1)(0,0,0)[0] : AIC=640.126, Time=0.02 sec
ARIMA(2,0,3)(0,0,0)[0] : AIC=626.689, Time=0.04 sec
ARIMA(3,0,3)(0,0,0)[0] : AIC=inf, Time=0.09 sec
ARIMA(2,0,4)(0,0,0)[0] : AIC=inf, Time=0.10 sec
ARIMA(1,0,4)(0,0,0)[0] : AIC=628.292, Time=0.05 sec
ARIMA(3,0,2)(0,0,0)[0] : AIC=inf, Time=0.12 sec
ARIMA(3,0,4)(0,0,0)[0] : AIC=inf, Time=0.12 sec
ARIMA(2,0,3)(0,0,0)[0] intercept : AIC=625.028, Time=0.11 sec
ARIMA(1,0,3)(0,0,0)[0] intercept : AIC=625.838, Time=0.10 sec
ARIMA(2,0,2)(0,0,0)[0] intercept : AIC=626.374, Time=0.06 sec
ARIMA(3,0,3)(0,0,0)[0] intercept : AIC=627.163, Time=0.11 sec
ARIMA(2,0,4)(0,0,0)[0] intercept : AIC=619.026, Time=0.13 sec
ARIMA(1,0,4)(0,0,0)[0] intercept : AIC=622.252, Time=0.07 sec
ARIMA(3,0,4)(0,0,0)[0] intercept : AIC=inf, Time=0.14 sec
ARIMA(2,0,5)(0,0,0)[0] intercept : AIC=626.192, Time=0.17 sec
ARIMA(1,0,5)(0,0,0)[0] intercept : AIC=624.287, Time=0.05 sec
ARIMA(3,0,5)(0,0,0)[0] intercept : AIC=inf, Time=0.20 sec
Best model: ARIMA(2,0,4)(0,0,0)[0] intercept
Total fit time: 1.823 seconds
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 103
Model: SARIMAX(2, 0, 4) Log Likelihood -301.513
Date: Fri, 19 Sep 2025 AIC 619.026
Time: 19:00:34 BIC 640.104
Sample: 0 HQIC 627.564
- 103
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 13.9358 3.871 3.600 0.000 6.348 21.523
ar.L1 -1.0185 0.192 -5.306 0.000 -1.395 -0.642
ar.L2 -0.7852 0.132 -5.950 0.000 -1.044 -0.527
ma.L1 1.5264 0.226 6.754 0.000 1.083 1.969
ma.L2 1.4972 0.287 5.216 0.000 0.935 2.060
ma.L3 0.5124 0.197 2.602 0.009 0.126 0.898
ma.L4 0.2151 0.147 1.458 0.145 -0.074 0.504
sigma2 19.5126 3.697 5.278 0.000 12.266 26.759
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 262.57
Prob(Q): 1.00 Prob(JB): 0.00
Heteroskedasticity (H): 1.85 Skew: 1.96
Prob(H) (two-sided): 0.08 Kurtosis: 9.77
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
# Plot model diagnostics to assess the quality of the ARIMA model
# This creates 4 diagnostic plots: residuals, Q-Q plot, standardized residuals, and ACF of residuals
auto_arima_model.plot_diagnostics(figsize=(12, 8))
plt.tight_layout()
plt.show()
# Review of ARIMA model fitting and forecasting code
from statsmodels.tsa.arima.model import ARIMA
# Get the best (p,d,q) order from auto_arima
best_order = auto_arima_model.order
# Fit ARIMA on the full time series using the best order
final_model = ARIMA(kachin_time_series_indexed, order=best_order)
final_model_results = final_model.fit()
# Forecasting:
# The original code attempts to forecast 20 months ahead by adding pd.DateOffset(months=20) to the last date.
# However, if the index is a PeriodIndex (e.g., with freq='Q' or 'M'), adding a DateOffset can cause an IncompatibleFrequency error.
# Instead, use the forecast() method with steps=N, or use predict() with integer positions, or ensure the offset matches the index frequency.
# Get the last date in the time series index
last_date = kachin_time_series_indexed.index[-1]
# Determine forecast horizon (e.g., 20 periods ahead)
forecast_periods = 20
# If the index is a PeriodIndex, use forecast(steps=forecast_periods)
# This is robust for both PeriodIndex and DatetimeIndex
forecasted_ARIMA = final_model_results.forecast(steps=forecast_periods)
# Plotting the original time seris with forecast
plt.figure(figsize=(16, 8))
# Plot the original time series
plt.plot(kachin_time_series_indexed.index.to_timestamp(), kachin_time_series_indexed.values, color='c', label='Original Series')
# --- Add auto_arima model predictions on both train and test data ---
# Get predictions from auto_arima on train data
autoarima_train_pred = pd.Series(
auto_arima_model.predict_in_sample(),
index=train_data.index
)
# Plot auto_arima predictions for train data
plt.plot(autoarima_train_pred.index.to_timestamp(), autoarima_train_pred.values,
color='red', linestyle='--', label='auto_arima Fit (Train)')
plt.title('Actual vs Predicted (ARIMA and auto_arima)')
plt.legend()
plt.show()